{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# patient\n",
    "\n",
    "The patinet table is a core part of the eICU-CRD and contains all information related to tracking patient unit stays. The table also contains patient demographics and hospital level information."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/alistairewj/.local/lib/python3.5/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use \"pip install psycopg2-binary\" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.\n",
      "  \"\"\")\n"
     ]
    }
   ],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import psycopg2\n",
    "import getpass\n",
    "import pdvega\n",
    "\n",
    "# for configuring connection \n",
    "from configobj import ConfigObj\n",
    "import os\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Database: eicu\n",
      "Username: alistairewj\n"
     ]
    }
   ],
   "source": [
    "# Create a database connection using settings from config file\n",
    "config='../db/config.ini'\n",
    "\n",
    "# connection info\n",
    "conn_info = dict()\n",
    "if os.path.isfile(config):\n",
    "    config = ConfigObj(config)\n",
    "    conn_info[\"sqluser\"] = config['username']\n",
    "    conn_info[\"sqlpass\"] = config['password']\n",
    "    conn_info[\"sqlhost\"] = config['host']\n",
    "    conn_info[\"sqlport\"] = config['port']\n",
    "    conn_info[\"dbname\"] = config['dbname']\n",
    "    conn_info[\"schema_name\"] = config['schema_name']\n",
    "else:\n",
    "    conn_info[\"sqluser\"] = 'postgres'\n",
    "    conn_info[\"sqlpass\"] = ''\n",
    "    conn_info[\"sqlhost\"] = 'localhost'\n",
    "    conn_info[\"sqlport\"] = 5432\n",
    "    conn_info[\"dbname\"] = 'eicu'\n",
    "    conn_info[\"schema_name\"] = 'public,eicu_crd'\n",
    "    \n",
    "# Connect to the eICU database\n",
    "print('Database: {}'.format(conn_info['dbname']))\n",
    "print('Username: {}'.format(conn_info[\"sqluser\"]))\n",
    "if conn_info[\"sqlpass\"] == '':\n",
    "    # try connecting without password, i.e. peer or OS authentication\n",
    "    try:\n",
    "        if (conn_info[\"sqlhost\"] == 'localhost') & (conn_info[\"sqlport\"]=='5432'):\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   user=conn_info[\"sqluser\"])            \n",
    "        else:\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   host=conn_info[\"sqlhost\"],\n",
    "                                   port=conn_info[\"sqlport\"],\n",
    "                                   user=conn_info[\"sqluser\"])\n",
    "    except:\n",
    "        conn_info[\"sqlpass\"] = getpass.getpass('Password: ')\n",
    "\n",
    "        con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                               host=conn_info[\"sqlhost\"],\n",
    "                               port=conn_info[\"sqlport\"],\n",
    "                               user=conn_info[\"sqluser\"],\n",
    "                               password=conn_info[\"sqlpass\"])\n",
    "query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## uniquePid\n",
    "\n",
    "The `uniquePid` column identifies a single patient across multiple stays. Let's look at a single `uniquepid`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patientunitstayid</th>\n",
       "      <th>patienthealthsystemstayid</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>ethnicity</th>\n",
       "      <th>hospitalid</th>\n",
       "      <th>wardid</th>\n",
       "      <th>apacheadmissiondx</th>\n",
       "      <th>admissionheight</th>\n",
       "      <th>hospitaladmityear</th>\n",
       "      <th>...</th>\n",
       "      <th>unitstaytype</th>\n",
       "      <th>admissionweight</th>\n",
       "      <th>dischargeweight</th>\n",
       "      <th>unitdischargeyear</th>\n",
       "      <th>unitdischargetime24</th>\n",
       "      <th>unitdischargetime</th>\n",
       "      <th>unitdischargeoffset</th>\n",
       "      <th>unitdischargelocation</th>\n",
       "      <th>unitdischargestatus</th>\n",
       "      <th>uniquepid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>141178</td>\n",
       "      <td>128927</td>\n",
       "      <td>Female</td>\n",
       "      <td>52</td>\n",
       "      <td>Caucasian</td>\n",
       "      <td>60</td>\n",
       "      <td>83</td>\n",
       "      <td></td>\n",
       "      <td>162.6</td>\n",
       "      <td>2015</td>\n",
       "      <td>...</td>\n",
       "      <td>admit</td>\n",
       "      <td>54.4</td>\n",
       "      <td>54.4</td>\n",
       "      <td>2015</td>\n",
       "      <td>09:18:00</td>\n",
       "      <td>midday</td>\n",
       "      <td>8</td>\n",
       "      <td>Step-Down Unit (SDU)</td>\n",
       "      <td>Alive</td>\n",
       "      <td>002-33870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>141179</td>\n",
       "      <td>128927</td>\n",
       "      <td>Female</td>\n",
       "      <td>52</td>\n",
       "      <td>Caucasian</td>\n",
       "      <td>60</td>\n",
       "      <td>83</td>\n",
       "      <td></td>\n",
       "      <td>162.6</td>\n",
       "      <td>2015</td>\n",
       "      <td>...</td>\n",
       "      <td>stepdown/other</td>\n",
       "      <td>NaN</td>\n",
       "      <td>60.4</td>\n",
       "      <td>2015</td>\n",
       "      <td>19:20:00</td>\n",
       "      <td>night</td>\n",
       "      <td>2042</td>\n",
       "      <td>Home</td>\n",
       "      <td>Alive</td>\n",
       "      <td>002-33870</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 36 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   patientunitstayid  patienthealthsystemstayid  gender age  ethnicity  \\\n",
       "0             141178                     128927  Female  52  Caucasian   \n",
       "1             141179                     128927  Female  52  Caucasian   \n",
       "\n",
       "   hospitalid  wardid apacheadmissiondx  admissionheight  hospitaladmityear  \\\n",
       "0          60      83                              162.6               2015   \n",
       "1          60      83                              162.6               2015   \n",
       "\n",
       "     ...        unitstaytype admissionweight  dischargeweight  \\\n",
       "0    ...               admit            54.4             54.4   \n",
       "1    ...      stepdown/other             NaN             60.4   \n",
       "\n",
       "  unitdischargeyear  unitdischargetime24 unitdischargetime  \\\n",
       "0              2015             09:18:00            midday   \n",
       "1              2015             19:20:00             night   \n",
       "\n",
       "  unitdischargeoffset  unitdischargelocation unitdischargestatus  uniquepid  \n",
       "0                   8   Step-Down Unit (SDU)               Alive  002-33870  \n",
       "1                2042                   Home               Alive  002-33870  \n",
       "\n",
       "[2 rows x 36 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uniquepid = '002-33870'\n",
    "query = query_schema + \"\"\"\n",
    "select *\n",
    "from patient\n",
    "where uniquepid = '{}'\n",
    "\"\"\".format(uniquepid)\n",
    "\n",
    "df = pd.read_sql_query(query, con)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we see two unit stays for a single patient. Note also that both unit stays have the same `patienthealthsystemstayid` - this indicates that they occurred within the same hospitalization.\n",
    "\n",
    "We can see the `unitstaytype` was 'admit' for one stay, and 'stepdown/other' for another. Other columns can give us more information."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patientunitstayid</th>\n",
       "      <th>wardid</th>\n",
       "      <th>unittype</th>\n",
       "      <th>unitstaytype</th>\n",
       "      <th>hospitaladmitoffset</th>\n",
       "      <th>unitdischargeoffset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>141178</td>\n",
       "      <td>83</td>\n",
       "      <td>Med-Surg ICU</td>\n",
       "      <td>admit</td>\n",
       "      <td>-14</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>141179</td>\n",
       "      <td>83</td>\n",
       "      <td>Med-Surg ICU</td>\n",
       "      <td>stepdown/other</td>\n",
       "      <td>-22</td>\n",
       "      <td>2042</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   patientunitstayid  wardid      unittype    unitstaytype  \\\n",
       "0             141178      83  Med-Surg ICU           admit   \n",
       "1             141179      83  Med-Surg ICU  stepdown/other   \n",
       "\n",
       "   hospitaladmitoffset  unitdischargeoffset  \n",
       "0                  -14                    8  \n",
       "1                  -22                 2042  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['patientunitstayid', 'wardid', 'unittype', 'unitstaytype', 'hospitaladmitoffset', 'unitdischargeoffset']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that it's not explicitly obvious which stay occurred first. Earlier stays will be closer to hospital admission, and therefore have a *higher* hospitaladmitoffset. Above, the stay with a `hospitaladmitoffset` of -14 was first (occurring 14 minutes after hospital admission), followed by the next stay with a `hospitaladmitoffset` of 22 (which occurred 22 minutes after hospital admission). Practically, we wouldn't consider the first admission a \"real\" ICU stay, and it's likely an idiosyncrasy of the administration system at this particular hospital. Notice how both rows have the same `wardid`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Age\n",
    "\n",
    "As ages over 89 are required to be deidentified by HIPAA, the `age` column is actually a string field, with ages over 89 replaced with the string value '> 89'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>&gt; 89</td>\n",
       "      <td>7081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>67</td>\n",
       "      <td>5078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>68</td>\n",
       "      <td>4826</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>72</td>\n",
       "      <td>4804</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>71</td>\n",
       "      <td>4764</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    age     n\n",
       "0  > 89  7081\n",
       "1    67  5078\n",
       "2    68  4826\n",
       "3    72  4804\n",
       "4    71  4764"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = query_schema + \"\"\"\n",
    "select age, count(*) as n\n",
    "from patient\n",
    "group by age\n",
    "order by n desc\n",
    "\"\"\"\n",
    "\n",
    "df = pd.read_sql_query(query, con)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As is common in eICU-CRD, there are a subset of hospitals who routinely utilize this portion of the medical record (and thus have 90-100% data completion), while there are other hospitals who rarely use this interface and thus have poor data completion (0-10%)."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
